## ----------------------------------------------------------
## Code to clean Bukoba study Qual 1 transcripts for Analysis
## ----------------------------------------------------------

library(dplyr)
library(reshape2)

### Load Qual data
qualdat <- read.csv("../data/bukobaqual1/qual_data_anonymized.csv")

### Qualitative data creation process, blind to treatment status (see excel spreadsheet for more details)
#qualdat.coding <- read_xls("qual_data_anonymized.xls")
# 1. pasted all answers to spreadsheet from original word documents
# 2. classified respondent as (par)ent (HT)head teach (CT)class teacher or (RS)researcher
# 3. coded as 1's all responses that could plausibly be used as "trace evidence" in theory testing
# 4. coded whether the response was evidence of POSITIVE parent engagement or change; or clear LACK of parent engagement or change

unique(qualdat$School) # 18 schools in the qualitative data

## Code Schools by treatment status
qualdat$treat <- ifelse(qualdat$School == "Bujugo" | 
                            qualdat$School == "Kabagara" | 
                            qualdat$School == "Katunga" | 
                            qualdat$School == "Kikomelo" | 
                            qualdat$School == "Ngarama" | 
                            qualdat$School == "Rushaka" | 
                            qualdat$School == "Katwe" | 
                            qualdat$School == "Musira", "VP",
                    (ifelse(qualdat$School == "Kalema" | 
                            qualdat$School == "Kashozi" | 
                            qualdat$School == "Katale" | 
                            qualdat$School == "Katoju" |
                            qualdat$School == "Kihumulo" | 
                            qualdat$School == "Kyenge" | 
                            qualdat$School == "Ruhunga" | 
                            qualdat$School == "Kyamulaile", "IW", "SO")))

table(qualdat$School, qualdat$treat)

## Code Schools by whether they were in the top or bottom half of 
# their treatment arm with respect to change in Educational Efficacy 
qualdat$topbot <- (ifelse(qualdat$School == "Kabagara" | 
                              qualdat$School == "Katunga" | 
                              qualdat$School == "Kikomelo" |
                              qualdat$School == "Musira" |
                              qualdat$School == "Musira" | 
                              qualdat$School == "Kalema" | 
                              qualdat$School == "Kashozi" | 
                              qualdat$School == "Kihumulo" | 
                              qualdat$School == "Katoju" | 
                              qualdat$School == "Kabirizi" | 
                              qualdat$School == "Rutete"  | 
                              qualdat$School == "Butainamwa" | 
                              qualdat$School == "Kasharu" , 
                              "Top", "Bot"))

## Label "Trace evidence" as 1 for examples of positive change; 0 when respondent said no change
qualdat$evidence <- ifelse(qualdat$trace_evidence == 1 & qualdat$positive_parentengage == 1, 1, 
                             (ifelse(qualdat$trace_evidence == 1 & qualdat$lack_parentengage == 1, 0, NA))) 

# get rid of questions about interpreting study design
qualdat <- subset(qualdat, questshort != "interp_studydesign")


### Do interviewees make about the same number of comments?
table(qualdat[qualdat$Who %in% c("HT") & qualdat$trace_evidence == 1,]$who_spec)
table(qualdat[qualdat$Who %in% c("CT") & qualdat$trace_evidence == 1,]$who_spec)
table(qualdat[qualdat$Who %in% c("par") & qualdat$trace_evidence == 1,]$who_spec)
# no outliers

### Reshape data so that each observation is an individual interviewee
qualdat_indiv <- qualdat %>% 
  filter(trace_evidence == 1 & Who %in% c("HT", "CT", "par")) %>% 
  group_by(Who, who_spec, treat) %>% 
  summarize(
    PositiveChange = sum(positive_parentengage, na.rm = T),
    NoChange = sum(lack_parentengage, na.rm = T),
    TotalComments = n()
  ) %>%
  mutate(
    comment_bin = ifelse(TotalComments <= 2, "two_or_one", "three_or_more")
  ) %>% as.data.frame()


qualdat_indiv2 <-  melt(qualdat_indiv[,c("who_spec", "treat", "Who", "PositiveChange", "NoChange")], 
                        id.vars = c('who_spec', 'treat', 'Who'), 
                        variable.name = 'ChangeType', 
                        value.name = 'Comments')
  
# reorder factor variables
qualdat_indiv$treat = factor(qualdat_indiv$treat, levels=c("VP", "IW", "SO"))
qualdat_indiv2$treat = factor(qualdat_indiv2$treat, levels=c("VP", "IW", "SO"))
qualdat_indiv2$ChangeType = factor(qualdat_indiv2$ChangeType, levels=c("NoChange", "PositiveChange"))


